#Nature's Kidneys: the Role of Wetland Reserve Easements in Restoring Water Quality
#Nicole Karwowski and Marin Skidmore
#Replication File
#2025

#Merge all the data together to build a panel
#This is the huc12_year_month panel
#Incorporate SNAPD, easements, and control datasets
#Generate other variables for analysis
#Label variables for tables

#open libraries
library(fst)
library(ggplot2)
library(raster)
library(sf)
library(dplyr)
library(tidyr)
library(readr)
library(mapview)
library(fixest)
library(collapse)
library(Hmisc)

#setwd
#setwd("C:/Users/16308/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package/Data")
setwd("C:/Users/n57m645/OneDrive - Montana State University/UW_Madison/Water quality/Replication Package/Data")

#open needed data sets
#SNAPD wq huc12
load("SNAPD/Processed/SNAPwq_huc_ym.RData")

#NRCS WRP huc12
load("NRCS easements/Processed/easements_panel_huc_ym.RData")

#prism temp and ppt
load("PRISM/Processed/tmp_huc12_monthly_1980_2018.RData")
load("PRISM/Processed/ppt_huc12_monthly_1980_2018.RData")

#huc12 details
huc12<-st_read("WatershedBoundaries/Raw/USGS_WBD")

#LCMAP land use
load("LCMAP/Processed/lcmap_landcover_huc12_panel.RData")

#CDL land use
load("CDL/Processed/cdl_landcover_huc12_year_panel.RData")

#ECHO permits
load("ECHO/Processed/nitrogen_point_huc12_year_panel.RData")
load("ECHO/Processed/phosphorous_point_huc12_year_panel.RData")

#closest river
load("Rivers_Streams/Processed/nn_rivers_huc12.RData")

#county data
load("CountyBoundaries/Processed/Huc12County.RData")

#NASS animal units
load("NassAnimalUnits/Processed/AnimalUnits_hucyear.RData")

#population
load("Population/Processed/pop_huc12.RData")

#TRI point emissions
load("TRI/Processed/tri_huc12_panel.Rda")

#Streamflow 
load("Streamflow/Processed/streamflow_monthyear.Rda")

################################################################################
#Merging

#Part I: merge SNAPD and WRP
merge<-merge(wq_huc12, ease_huc12, by=c("huc12", "YearMonth"), all.x=TRUE, all.y=FALSE)

#fill in the missing with zeroes for new easements
merge<- merge %>% mutate_at(c(35:41), ~replace_na(.,0))

#to turn in into a date, need to add the day. put in 01 for it. 
merge$YearMonthDate<-paste0(merge$Year, sep="-", merge$Month, sep="-", "01")
merge$YearMonthDate<-as.Date(merge$YearMonthDate)

merge<-merge %>% relocate(YearMonthDate, .after=YearMonth)

#create a cumulative count of easement acres
merge<-merge %>% arrange(huc12, YearMonthDate) 

merge$ApplyCum<-ave(merge$ApplyEasementAcres, merge$huc12, FUN=cumsum)
merge$ApplyCropCum<-ave(merge$ApplyEasementCropAcres, merge$huc12, FUN=cumsum)

merge$ClosedCum<-ave(merge$ClosedEasementAcres, merge$huc12, FUN=cumsum)
merge$ClosedCropCum<-ave(merge$ClosedEasementCropAcres, merge$huc12, FUN=cumsum)

merge$RestoredCum<-ave(merge$RestoredEasementAcres, merge$huc12, FUN=cumsum)
merge$RestoredCropCum<-ave(merge$RestoredEasementCropAcres, merge$huc12, FUN=cumsum)
merge$RestoredProjectsCum<-ave(merge$RestoredProjects, merge$huc12, FUN=cumsum)

#Part II: add prism temp and ppt to our main dataset
prism<-merge(ppt, tmp, by=c("huc12", "YearMonth"), all.x=T, all.y=T)
rm(ppt)
rm(tmp)

merge<-merge(merge, prism, by=c("huc12", "YearMonth"), all.x=TRUE, all.y=FALSE)
summary(merge)
rm(prism)

colnames(merge)

#Part III: add huc12 details (area, states, to huc12)

huc12<-huc12[, c(3,4, 6:9, 11:15, 17, 20)]
huc12<-huc12[,c(3, 1, 6, 5)]
huc12<-subset(as.data.frame(huc12), select=-geometry)

huc12$areaacres<-as.numeric(huc12$areaacres)

#merge huc details
merge<-merge(merge, huc12, by="huc12", all.x=TRUE, all.y=FALSE)

merge<-merge %>% relocate(Ppt, .after=Month)
merge<-merge %>% relocate(Temp, .after=Ppt)
merge<-merge %>% relocate(areaacres, .after=Temp)
merge<-merge %>% relocate(states, .after=areaacres)
merge<-merge %>% relocate(tohuc, .after=states)

#add a huc 8 variable
merge$huc8<-substring(merge$huc12, 1, 8)
merge<-merge %>% relocate(huc8, .after=huc12)

#Part IV: Create some other treatment variables
#Treatment defining
#focus on restoration first
#focus on total acres not cropland ones. 

#Binary
merge$RestorationExists<-ifelse(merge$RestoredCum>0,1,0)
table(merge$RestorationExists, useNA="always")

merge$ClosingExists<-ifelse(merge$ClosedCum>0,1,0)

#percentage of huc12 area
summary(merge$areaacres, useNA="always")

merge$ApplyCumP<-merge$ApplyCum/merge$areaacres
merge$ApplyCropCumP<-merge$ApplyCropCum/merge$areaacres
merge$ClosedCumP<-merge$ClosedCum/merge$areaacres
merge$ClosedCropCumP<-merge$ClosedCropCum/merge$areaacres
merge$RestoredCumP<-merge$RestoredCum/merge$areaacres
merge$RestoredCropCumP<-merge$RestoredCropCum/merge$areaacres

summary(merge)

#Part V: merge CDL data
cdl_huc12$year<-as.character(cdl_huc12$year)

merge<-merge(merge, cdl_huc12, by.x=c("huc12", "Year"), by.y=c("huc12", "year"), all.x=TRUE, all.y=FALSE)

#note that the cdl panel is shorter than the full one. not all years available. 

#Part VI: add npdes nitrogen and phosphorous data from ECHO

npermits$Year<-as.character(npermits$Year)
ppermits$Year<-as.character(ppermits$Year)

merge<-merge(merge, npermits, by.x=c("huc12", "Year"), by.y=c("huc12", "Year"), all.x=TRUE, all.y=FALSE)
merge<-merge(merge, ppermits, by.x=c("huc12", "Year"), by.y=c("huc12", "Year"), all.x=TRUE, all.y=FALSE)

table(is.na(merge$NitrogenFacilities))
table(is.na(merge$PhosphorousFacilities))

#the rest are zeros when they are after 2007. 
merge$NitrogenFacilities<-ifelse(is.na(merge$NitrogenFacilities) & as.numeric(merge$Year)>2007, 0, merge$NitrogenFacilities)
merge$PhosphorousFacilities<-ifelse(is.na(merge$PhosphorousFacilities) & as.numeric(merge$Year)>2007, 0, merge$PhosphorousFacilities)

summary(merge)

#Part VII: merge LCMAP land use data
merge<-merge(merge, lcmap_huc12, by.x=c("huc12", "Year"), by.y=c("huc12", "year"), 
             all.x=TRUE, all.y=FALSE )

#Part VIII: nearest river or stream
colnames(river_nearest_huc12)<-c("huc12", "riverid", "rivername", "riverfeature", "rivermiles", "distancetoriver_meters")

merge<-merge(merge, river_nearest_huc12, by="huc12", all.x=TRUE, all.y=FALSE)
table(is.na(merge$distancetoriver_meters))

#Part IX
#county merger (pick county that has the largest area in the huc12 if more than one)

#order by huc12 group and keep 1st. 
county_huc12_key<- county_huc12_key %>% group_by(huc12) %>% mutate(order=row_number())
keym<-subset(county_huc12_key, order==1)

keym<-keym[,c(1,2,3,6)]

#merge by huc number
merge<- merge(merge, keym, by.x="huc12", by.y="huc12", all.x=TRUE, all.y=FALSE)
table(is.na(merge$fips))

missing<-subset(merge, is.na(merge$fips))
table(missing$huc12)

#NOTE: fill in these four missing huc fips!
missing_huc<-as.data.frame(unique(missing[,2]))
colnames(missing_huc)<-"huc12"

#map them out
huc12<-st_read("WatershedBoundaries/Raw/USGS_WBD")

missing_huc_geo<-subset(huc12, huc12 %in% c("080802060800" , "080801030900" ,
                                              "080903020900", "080903010700" ))
mapview(missing_huc_geo)

#these 4 hucs do not have fips becuase they are in the ocean
#they are on the shores of Louisiana 
#we use the fips that they are bordering

merge$fips<-ifelse(merge$huc12=="080802060800", "22023", merge$fips)
merge$fips<-ifelse(merge$huc12=="080801030900", "22045", merge$fips)
merge$fips<-ifelse(merge$huc12=="080903020900", "22109", merge$fips)
merge$fips<-ifelse(merge$huc12=="080903010700", "22057", merge$fips)

#Part X
#merge NASS animals. note i took weighted average of the county year measures
merge<-merge(merge, animals_huc12, by.x=c("huc12", "Year"), by.y=c("huc12", "Year"), all.x=TRUE, all.y=FALSE)

#assume 0 for huc12s with missing AnUnits after 1997
table(is.na(merge$AnimalUnits))
merge$AnimalUnits<-ifelse(is.na(merge$AnimalUnits) & as.numeric(merge$Year)>1996, 0, merge$AnimalUnits)
table(is.na(merge$AnimalUnits))

#Part XI
#population estimates for huc12
merge<-merge(merge, pop_huc12, by.x=c("huc12"), by.y=c("huc12"), all.x=TRUE, all.y=FALSE)

table(is.na(merge$pop_est))

#Part XII
#merge in stream flow
colnames(streamflow_year)<-c("huc12", "Year", "Month", "streamflow")
streamflow_year$Year<-as.character(streamflow_year$Year)
streamflow_year$Month<-as.character(streamflow_year$Month)
streamflow_year$Month<-str_pad(streamflow_year$Month, 2, pad="0")
streamflow_year$YearMonth<-paste(streamflow_year$Year, streamflow_year$Month, sep="")

streamflow_year<-subset(streamflow_year, select=-c(Year, Month))

merge<-merge(merge, streamflow_year, by=c("huc12", "YearMonth"), all.x=TRUE, all.y=FALSE)
table(is.na(merge$streamflow))

missing<-subset(merge, is.na(merge$streamflow))
table(missing$Year)
table(streamflow_year$YearMonth)
#only goes to 2016

#if missing, fill in by huc12-month average
merge <-merge %>% group_by(huc12, Month) %>% mutate(streamflow=ifelse(is.na(streamflow), mean(streamflow, na.rm=TRUE), streamflow))
summary(merge$streamflow)

#Part XIII
#upstream water quality and upstream wetland easements 

#shorten the WRP panel to create upstream WRP measures
ease<-subset(ease_huc12, select=c("huc12", "YearMonth", "ClosedEasementAcres", "RestoredEasementAcres"))

#create the huc12 upstream-downstream key for all huc12
relate<-st_drop_geometry(subset(huc12, select=c(huc12, tohuc)))
relate<-unique(relate)
colnames(relate)<-c("upstream", "downstream")

#for each huc12-month-year create a running total of WRP acres

#build a matrix for all huc12-month-year combinations
st<-as.Date("1985-01-01")
en<-as.Date("2020-01-01")
time<-as.data.frame(seq(st +1, en+1, by="1 months"))
colnames(time)<-"date"
time$YearMonth<-paste0(substring(time$date, 1, 4), substring(time$date, 6, 7))
time<-subset(time, select="YearMonth")

units<-st_drop_geometry(subset(huc12, select="huc12"))
units<-unique(units)

#the cross command will create a matrix of all the huc12 with all the month-years
panel<-crossing(units, time)

#okay now we want to add in the wetland easements to the panel
#this tells us when a new WRP was added in a huc12
panel<-merge(panel, ease, by=c("huc12", "YearMonth"), all.x=TRUE, all.y=FALSE)

#add streamflow into the panel
panel<-merge(panel, streamflow_year, by=c("huc12", "YearMonth"), all.x=TRUE, all.y=FALSE)

#if there is no wetland easement added that year, assume its 0. 
#change the NAs into 0
panel$ClosedEasementAcres<-ifelse(is.na(panel$ClosedEasementAcres), 0, panel$ClosedEasementAcres)
panel$RestoredEasementAcres<-ifelse(is.na(panel$RestoredEasementAcres), 0, panel$RestoredEasementAcres)

#create a running sum
#its already sorted by huc12 and time correctly
list(panel)
panel$UpstreamClosedCum<-ave(panel$ClosedEasementAcres, panel$huc12, FUN=cumsum)
panel$UpstreamRestoredCum<-ave(panel$RestoredEasementAcres, panel$huc12, FUN=cumsum)


#for each huc12-Year-Month in our sample we want to know the upstream wetland easement total. 
#these are the huc12-year month combinations of interest
a1<-subset(merge, select=c(huc12, YearMonth))

#what are the huc12s that are upstream from those huc12 of interest?
a2<-merge(a1, relate, by.x="huc12", by.y="downstream", all.x=TRUE, all.y=FALSE)
table(is.na(a2$upstream))
#note that we do not always have an upstream huc12. so these are true "NAs"

#merge the WRP easement from our created panel to a2
#we gain variables here because earlier we did not always have the huc12-yearmonth combination
a3<-merge(a2, panel, by.x=c("upstream", "YearMonth"), by.y=c("huc12", "YearMonth"), all.x=TRUE, all.y=FALSE)

#we then collapse the dataset by Year-Month and huc12 to get a unique observation
#there are times when there are multiple huc12s upstream from a huc12
#so we take the average of the upstream huc12
a4<-collap(a3, UpstreamClosedCum+UpstreamRestoredCum~YearMonth+huc12, FUN=fmean, na.rm=TRUE)
summary(a4)

colnames(a4)<-c("YearMonth", "huc12", "UpstreamClosedCum", "UpstreamRestoredCum")
summary(a4)

#take a weighted average by streamflow
a5<-collap(a3, UpstreamClosedCum+UpstreamRestoredCum~YearMonth+huc12,
           FUN=function(x, w) wtd.mean(x, weights = w, na.rm = TRUE),
           w = a3$streamflow)

colnames(a5)<-c("streamflow", "YearMonth", "huc12", "UpstreamClosedCumSf", "UpstreamRestoredCumSf")
summary(a5)
a5<-subset(a5, select=-c(streamflow))
a5 <- a5 %>% mutate_all(~ifelse(is.nan(.), NA, .))


#merge it back into the main dataset
merge<-merge(merge, a4, by.x=c("YearMonth", "huc12"), by.y=c("YearMonth", "huc12"),
             all.x=TRUE, all.y=FALSE)

merge<-merge(merge, a5, by.x=c("YearMonth", "huc12"), by.y=c("YearMonth", "huc12"),
             all.x=TRUE, all.y=FALSE)

#if streamflow is missing, use the standard metric. 
table(is.na(merge$UpstreamRestoredCum), is.na(merge$UpstreamRestoredCumSf))
table(is.na(merge$UpstreamClosedCum), is.na(merge$UpstreamClosedCumSf))

merge$UpstreamRestoredCumSf<-ifelse(is.na(merge$UpstreamRestoredCumSf), merge$UpstreamRestoredCum, merge$UpstreamRestoredCumSf)
merge$UpstreamClosedCumSf<-ifelse(is.na(merge$UpstreamClosedCumSf), merge$UpstreamClosedCum, merge$UpstreamClosedCumSf)

table(is.na(merge$UpstreamRestoredCum), is.na(merge$UpstreamRestoredCumSf))
table(is.na(merge$UpstreamClosedCum), is.na(merge$UpstreamClosedCumSf))

#create a proportion variable 
merge$UpstreamClosedCumP<-merge$UpstreamClosedCum/merge$areaacres
merge$UpstreamRestoredCumP<-merge$UpstreamRestoredCum/merge$areaacres

merge$UpstreamClosedCumSfP<-merge$UpstreamClosedCumSf/merge$areaacres
merge$UpstreamRestoredCumSfP<-merge$UpstreamRestoredCumSf/merge$areaacres

merge<-merge %>% relocate(UpstreamClosedCumSf, .after=UpstreamClosedCum)
merge<-merge %>% relocate(UpstreamRestoredCumSf, .after=UpstreamRestoredCum)

merge<-merge %>% relocate(UpstreamClosedCumSfP, .after=UpstreamClosedCumP)
merge<-merge %>% relocate(UpstreamRestoredCumSfP, .after=UpstreamRestoredCumP)


#Upstream Water Quality
wq_upstream<-subset(wq_huc12, select=c("huc12", "YearMonth", "Year", "Month",
                                       "ammonia_filtered", "ammonia_unfiltered",
                                       "TP_filtered","TP_unfiltered",
                                       "nitrate_filtered", "nitrate_unfiltered",
                                       "TKN_filtered", "TKN_unfiltered"))

wq_upstream<-merge(wq_upstream, streamflow_year, by=c('huc12', "YearMonth"), all.x=TRUE, all.y=FALSE)

#fill in with the month huc12 average for the missing values
wq_upstream <-wq_upstream %>% group_by(huc12, Month) %>% mutate(streamflow=ifelse(is.na(streamflow), mean(streamflow, na.rm=TRUE), streamflow))
summary(wq_upstream)

wq_upstream_merge<-merge(a2, wq_upstream, by.x=c("upstream", "YearMonth"), by.y=c("huc12", "YearMonth"), all.x=FALSE, all.y=FALSE)

#then we collapse by Year-Month and huc12 to get a unique observation for upstream water quality
wq_upstream_coll<-collap(subset(wq_upstream_merge, select=-c(upstream)), 
                         ammonia_filtered+ammonia_unfiltered+TP_filtered+TP_unfiltered+
                           nitrate_filtered+nitrate_unfiltered+TKN_filtered+TKN_unfiltered~YearMonth+huc12, FUN=fmean)

wq_upstream_coll_weighted<-collap(subset(wq_upstream_merge, select=-c(upstream)), 
                                  ammonia_filtered+ammonia_unfiltered+TP_filtered+
                                    TP_unfiltered+nitrate_filtered+nitrate_unfiltered+
                                    TKN_filtered+TKN_unfiltered~YearMonth+huc12, 
                                  FUN=function(x, w) wtd.mean(x, weights = w, na.rm = TRUE),
                                  w = wq_upstream_merge$streamflow)

#rename it
wq_upstream_coll <- wq_upstream_coll %>%
  rename_at(vars(3:10), ~ paste0("upstream_", .))

wq_upstream_coll_weighted <- wq_upstream_coll_weighted %>%
  rename_at(vars(4:11), ~ paste0("upstream_sf_", .))

#merge them together
wq_upstream_all<-merge(wq_upstream_coll, wq_upstream_coll_weighted, by=c("huc12", "YearMonth"), all.x=TRUE, all.y=TRUE)
wq_upstream_all<-subset(wq_upstream_all, select=-c(streamflow))
wq_upstream_all <- wq_upstream_all %>% mutate_all(~ifelse(is.nan(.), NA, .))

table(wq_upstream_all$upstream_ammonia_filtered==wq_upstream_all$upstream_sf_ammonia_filtered)
#most of the time they are the same. that is becuase there is usually only one value in an upstream huc12

#merge it into the main dataset
merge<-merge(merge, wq_upstream_all, by=c("YearMonth", "huc12"), all.x=TRUE, all.y=FALSE)

#if stream flow is missing, use the standard upstream water quality measure
merge$upstream_sf_ammonia_filtered<-ifelse(is.na(merge$upstream_sf_ammonia_filtered), merge$upstream_ammonia_filtered, merge$upstream_sf_ammonia_filtered)
merge$upstream_sf_ammonia_unfiltered<-ifelse(is.na(merge$upstream_sf_ammonia_unfiltered), merge$upstream_ammonia_unfiltered, merge$upstream_sf_ammonia_unfiltered)
merge$upstream_sf_TP_filtered<-ifelse(is.na(merge$upstream_sf_TP_filtered), merge$upstream_TP_filtered, merge$upstream_sf_TP_filtered)
merge$upstream_sf_TP_unfiltered<-ifelse(is.na(merge$upstream_sf_TP_unfiltered), merge$upstream_TP_unfiltered, merge$upstream_sf_TP_unfiltered)
merge$upstream_sf_nitrate_filtered<-ifelse(is.na(merge$upstream_sf_nitrate_filtered), merge$upstream_nitrate_filtered, merge$upstream_sf_nitrate_filtered)
merge$upstream_sf_nitrate_unfiltered<-ifelse(is.na(merge$upstream_sf_nitrate_unfiltered), merge$upstream_nitrate_unfiltered, merge$upstream_sf_nitrate_unfiltered)
merge$upstream_sf_TKN_filtered<-ifelse(is.na(merge$upstream_sf_TKN_filtered), merge$upstream_TKN_filtered, merge$upstream_sf_TKN_filtered)

                                           
#Part XIV
#Create additional variables 
merge$huc8year<-paste0(merge$huc8, merge$Year, sep="")
merge$huc4<-substring(merge$huc8, 1, 4)
merge$huc4year<-paste0(merge$huc4, merge$Year, sep="")
merge$huc4month<-paste0(merge$huc4, merge$Month, sep="")
merge$vegetation<-merge$grass_shrub+merge$tree_cover
merge$RestoredBinary<-ifelse(merge$RestoredCumP>0,1,0)
merge$RestoredBinaryUpstream<-ifelse(merge$UpstreamRestoredCumP>0,1,0)

#variables for event study models 
merge <- merge %>% group_by(huc12) %>% mutate(MaxEasement=max(RestoredCumP))
summary(merge$MaxEasement)
merge <- merge %>% group_by(huc12) %>% mutate(MaxClosed = max(ClosedCumP))
summary(merge$MaxClosed)
table(is.na(merge$MaxEasement))
table(is.na(merge$MaxClosed))

merge$EverTreated<-ifelse(merge$MaxEasement>0, 1, 0)
table(merge$EverTreated)

#create treatment "on" after huc 12 gets first easement
#restored
merge$EasementYear<-ifelse(merge$RestoredCum>0, 1, 0)
table(merge$EasementYear)
#closed
merge$ClosedYear<-ifelse(merge$ClosedCum>0, 1, 0)
table(merge$ClosedYear)


#year of first treatment
#restored
merge<- merge %>% 
  group_by(huc12) %>% 
  mutate(first.treat = min(
    if_else(EasementYear == 1, as.numeric(YearMonth), 999999), na.rm = TRUE))
#closed
merge<- merge %>% 
  group_by(huc12) %>% 
  mutate(first.closed = min(
    if_else(ClosedYear == 1, as.numeric(YearMonth), 999999), na.rm = TRUE))

table(merge$first.treat)
table(merge$first.closed)

#restored
merge$first.treat.year<-substring(merge$first.treat, 1 , 4)
merge$first.treat.month<-substring(merge$first.treat, 5 , 6)
#closed
merge$first.closed.year<-substring(merge$first.closed, 1 , 4)
merge$first.closed.month<-substring(merge$first.closed, 5 , 6)

table(merge$first.treat.year)
table(merge$first.treat.month)
table(merge$first.closed.year)

#restored
merge$diff.year<-ifelse(merge$first.treat==999999, 0, as.numeric(merge$Year)-as.numeric(merge$first.treat.year))
merge$diff.month<-ifelse(merge$first.treat==999999, 0, as.numeric(merge$Month)-as.numeric(merge$first.treat.month))
#closed 
merge$closed.diff.year<-ifelse(merge$first.closed==999999, 0, as.numeric(merge$Year)-as.numeric(merge$first.closed.year))
merge$closed.diff.month<-ifelse(merge$first.closed==999999, 0, as.numeric(merge$Month)-as.numeric(merge$first.closed.month))

table(merge$diff.year)
table(merge$closed.diff.year)
options(max.print=99999)
#visually inspect obs in each year to treatment and actual year
table(merge$diff.year, merge$Year)
table(merge$diff.month)
table(merge$closed.diff.year, merge$Year)

#months since treatment
#restored
merge$TTT<-ifelse(merge$first.treat==999999, 0, 12*merge$diff.year + merge$diff.month)
#closed
merge$TTT.closed<-ifelse(merge$first.closed==999999, 0, 12*merge$closed.diff.year + merge$closed.diff.month)
#visually inspect obs in each year to treatment and actual year
#table(merge$TTT, merge$Year)

#years since treatment
#divide by 12 and round 
#restored
merge$TTTyear<-merge$TTT/12
table(merge$TTTyear)
#closed
merge$TTT.closedyear<-merge$TTT.closed/12
table(merge$TTT.closedyear)


#floor for all numbers
merge$TTTyear2<-floor(merge$TTTyear)
#visually inspect observations in each year to treatment and actual year 
table(merge$TTTyear2, merge$Year)

#closed
merge$TTT.closedyear2<-floor(merge$TTT.closedyear)
#visually inspect observations in each year to treatment and actual year 
table(merge$TTT.closedyear2, merge$Year)


#put the end periods into bins.
#restored
merge$TTTyear2<-ifelse(merge$TTTyear2< -5, -5, merge$TTTyear2)
merge$TTTyear2<-ifelse(merge$TTTyear2>10, 10, merge$TTTyear2)
table(merge$TTTyear2)
table(merge$TTTyear2, merge$Year)
#closed
merge$TTT.closedyear2<-ifelse(merge$TTT.closedyear2< -5, -5, merge$TTT.closedyear2)
merge$TTT.closedyear2<-ifelse(merge$TTT.closedyear2>10, 10, merge$TTT.closedyear2)

#code to inspect the rounding rules 
#temp <- subset(merge, merge$TTTyear > -2)
#temp <- subset(temp, temp$TTTyear < 2)
#table(temp$TTTyear2, temp$TTTyear)


#Part XV
#Add new TRI data
colnames(tri_huc12_panel)<-c("huc12", "Year", "tri_ammonia", "tri_ammonia_water", 
                             "tri_nitrate", "tri_nitrate_water", 
                             "tri_phosphorus", "tri_phosphorus_water")

tri_huc12_panel$Year<-as.character(tri_huc12_panel$Year)


merge<-merge(merge, tri_huc12_panel, by=c("huc12", "Year"), all.x=TRUE, all.y=FALSE)
summary(merge)

#the rest are zeros
merge$tri_ammonia<-ifelse(is.na(merge$tri_ammonia), 0, merge$tri_ammonia)
merge$tri_ammonia_water<-ifelse(is.na(merge$tri_ammonia_water), 0, merge$tri_ammonia_water)

merge$tri_nitrate<-ifelse(is.na(merge$tri_nitrate), 0, merge$tri_nitrate)
merge$tri_nitrate_water<-ifelse(is.na(merge$tri_nitrate_water), 0, merge$tri_nitrate_water)

merge$tri_phosphorus<-ifelse(is.na(merge$tri_phosphorus), 0, merge$tri_phosphorus)
merge$tri_phosphorus_water<-ifelse(is.na(merge$tri_phosphorus_water), 0, merge$tri_phosphorus_water)


#Part XVI New variables for models
merge$ihs_restored <- asinh(merge$RestoredCum)
merge$RestoredCumPSq <- merge$RestoredCumP^2 
merge$ihs_upstream_restored <- asinh(merge$UpstreamRestoredCum)
merge$ihs_upstream_restored_sf<-asinh(merge$UpstreamRestoredCumSf)

merge$ihs_ammonia <- asinh(merge$ammonia_filtered)
merge$ihs_TP <- asinh(merge$TP_filtered)
merge$ihs_TKN <- asinh(merge$TKN_filtered)

merge$ihs_upstream_ammonia <- asinh(merge$upstream_ammonia_filtered)
merge$ihs_upstream_TP <- asinh(merge$upstream_TP_filtered)
merge$ihs_upstream_TKN <- asinh(merge$upstream_TKN_filtered)

merge$ihs_upstream_ammonia_sf <- asinh(merge$upstream_sf_ammonia_filtered)
merge$ihs_upstream_TP_sf <- asinh(merge$upstream_sf_TP_filtered)
merge$ihs_upstream_TKN_sf <- asinh(merge$upstream_sf_TKN_filtered)

merge$month_num <- as.numeric(merge$Month)
merge$month_factor <- is.factor(merge$month_num)
merge$no_river <- ifelse(as.numeric(merge$distancetoriver_meters) > 1, 1, 0)
merge$HUC8<-as.numeric(merge$huc8)

###############################################################################
#save it as the main dataset
save(merge, file="Main/main_huc12_panel.RData")
load("Main/main_huc12_panel.RData")

summary(merge)
